library(rmapshaper)
library(censusapi)
library(shapefiles)
library(base)
library(tidyverse)
library(tigris)
library(sf)
library(mapview)
library(raster)
library(rlang)
library(rgeos)
library(data.table)
library(measurements)
library(smoothr)
library(lwgeom)
library(units)
library(geosphere)
library(kableExtra)
library(leaflet)
library(htmltools)
library(plotly)
library(osrm)
mapviewOptions(
basemaps = "OpenStreetMap"
)
options(
tigris_class = "sf",
tigris_use_cache = TRUE,
osrm.server = "http://127.0.0.1:5000/",
osrm.profile = "walk"
)
Sys.setenv(CENSUS_KEY="b88495f8315f45b2d100dee9ba8f4c489a7371c2")
projection <- "+proj=utm +zone=10 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=ft +no_defs"
`%notin%` <- Negate(`%in%`)
Does the location of where one lives impact the ability to enjoy outdoor green space? Does the size of the yard at your house play a factor in whether you regularly visit parks? Are communities under-visiting parks that are physically closer to them? The goal of this analysis is to provide insight on park accessibility within communities in the City of San Jose. We will consider factors such as backyard size, park visit frequency, and distance from a park to determine a community’s park accessibility.
The first few sections describe our methodological approaches for preparing the factors for analysis, followed by sections detailing the comparisons between factors and the resulting conclusions.
The following describes how the Safegraph data is processed:
Safegraph collects device visit information to “places of interest” (POI) like parks or grocery stores and includes some information about a device’s origin census block group. The visit counts in the Safegraph dataset fall into either one of these three cases:
Note: We obtained all of the data for the park visits portion of the analysis from Safegraph Patterns Data.
Our goal for this section is to obtain the average residential yard size for each census block group in San Jose. We start with parcel data from the Santa Clara County Assessor’s Office, and filter it down to only include parcels with residential land use codes. The following list details the land use codes we used for this analysis:
After filtering to parcels with these residential use codes, we spatially match the building footprint shape associated with that parcel. To get from parcel area to yard area, we subtract the building footprint shape out of the parcel shape.
#load("P:/SFBI/Data Library/OSM/osm_bldg.Rdata")
#
#osm_bldg <-
# osm_bldg %>% st_transform(projection)
sj_boundary <-
places("CA", cb=FALSE) %>%
filter(NAME == "San Jose") %>%
st_transform(projection)
#
sj_blockgroup <-
readRDS("sj_blockgroups.rds") %>%
st_transform(projection)
#mapview(sj_blockgroup)
#load("P:/SFBI/Data Library/California Blocks/ca_blocks_lite.Rdata")
#ca_blocks_lite <- ca_blocks_lite %>% st_transform(projection)
#sj_blocks <- ca_blocks_lite[sj_boundary,]
#save(sj_blocks, file = "sj_blocks.Rdata")
#load("sj_blocks.Rdata")
#mapview(sj_blocks[100,])
# sj_bldg <-
# osm_bldg[sj_boundary,] %>%
# transmute(
# index = row_number()
# )
#save(sj_bldg, file = "sj_bldg.Rdata")
load("sj_bldg.Rdata")
# scc_parcels <- st_read("P:/SFBI/Data Library/Parcels/santa clara/scc.shp")
#
# scc_parcels <-
# scc_parcels %>%
# st_transform(projection)
#
# This includes parcels parcels at the periphery of San Jose. This is important for the "block method" used to find street edges. otherwise this sf object isn't used.
# sj_parcels_plus <-
# scc_parcels[sj_blocks,] %>%
# dplyr::select(APN, USE_CODE, SITUS_CITY)
# #
# sj_parcels <-
# sj_parcels_plus %>%
# filter(SITUS_CITY == "SAN JOSE")
# #
# save(sj_parcels_plus, sj_parcels, file = "sj_parcels.Rdata")
load("sj_parcels.Rdata")
# apn_bg_lookup <- data.frame(APN = NA, origin_census_block_group = NA)
#
# for(i in 1:nrow(sj_blockgroup)){
# if(i %% 10 == 0){print(i)}
# temp <- sj_blockgroup[i,]
# temp2 <- sj_parcels[temp,] %>%
# dplyr::select(APN) %>%
# st_set_geometry(NULL) %>%
# mutate(
# origin_census_block_group = temp$origin_census_block_group
# )
# apn_bg_lookup <-
# apn_bg_lookup %>%
# rbind(temp2)
# }
#
# apn_bg_lookup <-
# apn_bg_lookup %>%
# na.omit()
#
# saveRDS(apn_bg_lookup,"sj_apn_bg_lookup.rds")
apn_bg_lookup <- readRDS("sj_apn_bg_lookup.rds")
sj_parcels_residential <-
sj_parcels %>%
mutate(
USE_CODE = as.numeric(USE_CODE)
) %>%
filter(USE_CODE %in% c(1,2,3,4,6,7))
#mapview(head(sj_parcels_residential,100))
matched_bldg_footprint <-
sj_bldg %>%
st_centroid() %>%
st_intersection(sj_parcels_residential) %>%
st_set_geometry(NULL) %>%
left_join(sj_bldg, by = "index") %>%
st_as_sf()
#
#
residential_parcels <-
sj_parcels_residential %>%
mutate(
parcel_area = st_area(.) %>% as.numeric()
) %>%
as.data.frame() %>%
st_as_sf() %>%
rename(parcel_geometry = geometry)
#
#
residential_bldg <-
residential_parcels %>%
as.data.frame() %>%
right_join(
matched_bldg_footprint %>% dplyr::select(APN),
by = "APN"
) %>%
filter(!is.na(parcel_area)) %>%
rename(building_geometry = geometry) %>%
st_as_sf() %>%
st_set_geometry("building_geometry")
#
#
residential_parcels <-
residential_bldg %>%
st_set_geometry("parcel_geometry")
#
residential_parcels <- residential_parcels[!duplicated(residential_parcels), ]
##############################################################
# available_yard <-
# st_difference(
# residential_parcels[1:10000,],
# st_union(
# residential_parcels$building_geometry[1:10000,])
# )
#
# available_yard2 <-
# st_difference(
# residential_parcels[10001:20000,],
# st_union(
# residential_parcels$building_geometry[10001:20000,])
# )
#
# available_yard3 <-
# st_difference(
# residential_parcels[20001:30000,],
# st_union(
# residential_parcels$building_geometry[20001:30000,])
# )
# #
# available_yard4 <-
# st_difference(
# residential_parcels[30001:40000,],
# st_union(
# residential_parcels$building_geometry[30001:40000,])
# )
#
# available_yard5 <-
# st_difference(
# residential_parcels[40001:50000,],
# st_union(
# residential_parcels$building_geometry[40001:50000,])
# )
#
# available_yard6 <-
# st_difference(
# residential_parcels[50001:60000,],
# st_union(
# residential_parcels$building_geometry[50001:60000,])
# )
#
# available_yard7 <-
# st_difference(
# residential_parcels[60001:70000,],
# st_union(
# residential_parcels$building_geometry[60001:70000,])
# )
#
# available_yard8 <-
# st_difference(
# residential_parcels[70001:80000,],
# st_union(
# residential_parcels$building_geometry[70001:80000,])
# )
#
# available_yard9 <-
# st_difference(
# residential_parcels[80001:90000,],
# st_union(
# residential_parcels$building_geometry[80001:90000,])
# )
#
# available_yard10 <-
# st_difference(
# residential_parcels[90001:100000,],
# st_union(
# residential_parcels$building_geometry[90001:100000,])
# )
#
# available_yard10 <-
# st_difference(
# residential_parcels[100001:nrow(residential_parcels),],
# st_union(
# residential_parcels$building_geometry[100001:nrow(residential_parcels),])
# )
# all_available_yard <-
# available_yard %>%
# rbind(available_yard2) %>%
# rbind(available_yard3) %>%
# rbind(available_yard4) %>%
# rbind(available_yard5) %>%
# rbind(available_yard6) %>%
# rbind(available_yard7) %>%
# rbind(available_yard8) %>%
# rbind(available_yard9) %>%
# rbind(available_yard10)
#save(all_available_yard, file = "sj_all_yard_available.Rdata")
load("sj_all_yard_available.Rdata")
# all_available_yard_other <-
# all_available_yard %>%
# filter(USE_CODE == 2 | USE_CODE == 3)
#
# all_available_yard_apartment <-
# all_available_yard %>%
# filter(USE_CODE == 4)
#
# all_available_yard_other2 <-
# all_available_yard %>%
# filter(USE_CODE %in% c(6,7))
#
# available_area_fun <- function(df, residential_parcels){
# df %>%
# as.data.frame() %>%
# rename(geometry = parcel_geometry) %>%
# left_join(residential_parcels %>% dplyr::select(APN), by = "APN") %>%
# st_as_sf() %>%
# st_set_geometry("geometry") %>%
# as_Spatial() %>%
# gBuffer(byid = T, width = 0) %>%
# sp::disaggregate() %>%
# st_as_sf() %>%
# rename(available_area_geometry = geometry) %>%
# st_set_geometry("available_area_geometry") %>%
# mutate(
# available_area = round(st_area(.) %>% as.numeric())
# ) %>%
# st_set_crs(projection)
# }
#
#
# # save(available_area, file = "sj_available_backyard_area.Rdata")
#
# #!!!!This .Rdata file constians all of the yard areas for parcels zoned as single-family residential!!!!
# load("sj_available_backyard_area.Rdata")
#
#
# # PUC 2,3
# available_area1_other <- available_area_fun(all_available_yard_other[1:nrow(all_available_yard_other),], residential_parcels)
#
# available_area1_other <-
# available_area1_other %>%
# dplyr::select(APN, available_area, available_area_geometry)
#
#
# #PUC 6,7
# available_area2_other <- available_area_fun(all_available_yard_other2, residential_parcels)
#
# available_area2_other <-
# available_area2_other %>%
# dplyr::select(APN, available_area, available_area_geometry)
#
#
#
# available_area_apartment <-
# all_available_yard_apartment %>%
# mutate(
# bldg_area = st_area(building_geometry)
# ) %>%
# group_by(APN, parcel_area) %>%
# summarize(bldg_area_sum = sum(as.numeric(bldg_area))) %>%
# mutate(available_area = parcel_area - bldg_area_sum)
#
# available_area_apartment <-
# available_area_apartment %>%
# rename(available_area_geometry = building_geometry) %>%
# dplyr::select(APN,available_area,available_area_geometry) %>%
# mutate(
# available_area = ifelse(available_area < 0,0,available_area)
# )
#
# #combining all PUC dataframes
# available_area_combo <-
# available_area %>%
# rbind(available_area1_other) %>%
# rbind(available_area2_other) %>%
# rbind(available_area_apartment)
#
#
# nix_list <- c("69403028","41425018","67013004","67013003","67029024","74207011")
#
#
# nix_df <-
# available_area %>%
# filter(APN %in% nix_list) %>%
# left_join(
# all_available_yard %>%
# st_set_geometry(NULL) %>%
# dplyr::select(APN, USE_CODE) %>%
# filter(APN %in% nix_list),
# by = "APN"
# )
#
#
# available_area_bg <-
# available_area_combo %>%
# left_join(
# apn_bg_lookup %>%
# filter(APN %in% available_area$APN),
# by = "APN"
# ) %>%
# filter(APN %notin% nix_list)
#
# #
# bg_area_avg <-
# available_area_bg %>%
# st_set_geometry(NULL) %>%
# group_by(origin_census_block_group) %>%
# summarise(yard_area_avg = sum(available_area))
# #
# extra_bg <-
# sj_blockgroup %>%
# filter(origin_census_block_group %notin% bg_area_avg$origin_census_block_group) %>%
# st_set_geometry(NULL) %>%
# dplyr::select(origin_census_block_group) %>%
# mutate(yard_area_avg = 0)
# #
# bg_area_avg_all <-
# bg_area_avg %>%
# rbind(extra_bg) %>%
# left_join(
# sj_blockgroup %>%
# dplyr::select(-DISTRICTS),
# by = "origin_census_block_group"
# ) %>%
# na.omit() %>%
# st_as_sf() %>%
# st_transform(4326)
#save(bg_area_avg_all, file = "sj_bg_avg_yard_area.Rdata")
load("sj_bg_avg_yard_area.Rdata")
sj_population <-
getCensus(
name = "acs/acs5",
vintage = 2018,
region = "block group:*",
regionin = "state:06+county:085",
vars = c("group(B01003)")
) %>%
mutate(
origin_census_block_group =
paste0(state,county,tract,block_group)
) %>%
rename(
total_pop = "B01003_001E"
) %>%
dplyr::select(total_pop, origin_census_block_group)
sj_yard_pop_weight <-
bg_area_avg_all %>%
left_join(
sj_population %>%
filter(origin_census_block_group %in% bg_area_avg_all$origin_census_block_group),
by = "origin_census_block_group"
) %>%
mutate(
yard_per_capita = yard_area_avg / total_pop
)
The following map shows the void building footprint within the parcel shape for one block group. The resulting shape represents the yard area for each parcel.
# example_bg_yard <-
# available_area_bg %>%
# filter(origin_census_block_group == "060855006002")
#
# mapview(example_bg_yard)
In order to align our geographic granularity with the other factors of our analysis, we take the sum of all of the yard sizes within a block group. This value is then divided by the block group population to obtain the yard area per capita. The following map shows the yard area per capita sizes for all of the block groups in San Jose. Notice that the largest values occur near the left border of the city, near the hills.
bg_area_avg_minus <-
sj_yard_pop_weight
blue_pal2 <- colorNumeric(
palette = colorRamp(c("#FFFFFF", "#00288c"), interpolate="spline"),
domain =
bg_area_avg_minus %>%
pull(yard_per_capita) %>%
unique()
)
yard_bg_map_minus <-
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data = sj_boundary %>% st_transform(4326),
fill = F,
color = "gray",
weight = 1,
label = ~NAME
) %>%
addPolygons(
data = bg_area_avg_minus,
fillColor = ~blue_pal2(yard_per_capita),
color = "black",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.75,
#group = "Average Daily Park Visits",
label = ~paste0("Block Group Average Yard Size per Capita: ",round(yard_per_capita), " square feet"),
highlightOptions =
highlightOptions(
weight = 2.25,
opacity = 1
)) %>%
addLegend(
position = "topright",
pal = blue_pal2,
values = bg_area_avg_minus$yard_per_capita,
title = "Avg. Yard Area per Capita (sqft.)",
opacity = 1
)
yard_bg_map_minus
Note: The data from the Santa Clara County assessor’s office likely includes multiple zoning misclassifications. For example, the following parcel has a Property Use Code of 1 (single-family residential), but it is actually a water treatment plant. This “yard” (that has an area of ~one million square feet) was inflating the yard size for the block group in which it is located.
# mapview(nix_df[7,])
We did a visual inspection of the single-family residential parcels in the block groups with large average yard sizes (over 30,000 sqft.), and removed the parcels that appeared to be misclassified. Keep in mind that there still may be other misclassified parcels that are being incorporated into the block group average yard size.
Some overall San Jose residential yard size statistics:
Mean San Jose Yard Area per Capita: 386 sq. ft.
Standard Deviation: 660 sq. ft.
Minimum San Jose Yard Size per Capita: 0 sq. ft.
Maximum San Jose Yard Size per Capita: 6300 sq. ft.
For now, I will put a hold on this analysis and move to the average number of people who visit San Jose parks that reside in these block groups. At the end, we will come back to these yard areas to compare against other accessibility factors.
When looking at park visitors who live in specific census block groups (CBGs), it is not sufficient to consider the CBG visit numbers at face value. The population of each block group plays an important factor in weighing a community’s visits to parks. For example, if a CBG has 3 residents, and they have an average of 3 daily park visits, then we can infer that this community has very active park visitors (even though the 3 park visits may seem low).
The following map displays all of the census block groups we considered in this analysis (see the Safegraph Data Collection Explanation section above for why some CBGs are missing). The “Park Visits” option illustrates the average daily number of visits that each CBG had to a SJ park since January 1, 2020. The “Park Visits per 1000” displays a ratio, which provides more insight into how actively a community visits parks (a higher ratio = more active, a lower ratio = less active).
####periodically update from sanjose_park_process file
park_origins <- readRDS("sj_park_origins.rds") %>%
filter(origin_census_block_group %in% sj_blockgroup$origin_census_block_group) %>%
mutate(month = month(date %>% as.Date()))
# park_origins_monthly <-
# park_origins %>%
# group_by(origin_census_block_group,month) %>%
# summarise(month_total = sum(mean_origin_visits)) %>%
# filter(month != 7)
park_geometry <-
readRDS('park_output_geometry.rds') %>%
st_as_sf() %>%
st_transform(projection)
park_origin_mean <-
park_origins %>%
group_by(origin_census_block_group) %>%
summarise(mean_visits = mean(mean_origin_visits))
visit_pop_combo <-
sj_population %>%
filter(origin_census_block_group %in% park_origin_mean$origin_census_block_group) %>%
left_join(
park_origin_mean,
by = "origin_census_block_group"
) %>%
na.omit() %>%
mutate(
visits_per_pop = round((mean_visits/total_pop)*1000)
) %>%
left_join(
sj_blockgroup %>%
dplyr::select(-DISTRICTS),
by = "origin_census_block_group"
) %>%
st_as_sf() %>%
st_transform(projection)
orange_pal <- colorNumeric(
palette = colorRamp(c("#fcd786", "#a67100"), interpolate="spline"),
domain =
visit_pop_combo %>%
pull(mean_visits) %>%
unique()
)
teal_pal <- colorNumeric(
palette = colorRamp(c("#d6fffe", "#015a68"), interpolate="spline"),
domain =
visit_pop_combo %>%
pull(visits_per_pop) %>%
unique()
)
all_visitors_map <-
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data = sj_boundary %>% st_transform(4326),
fill = F,
color = "gray",
weight = 1,
label = ~NAME
) %>%
addPolygons(
data = visit_pop_combo %>%
st_transform(4326),
fillColor = ~orange_pal(mean_visits),
color = "black",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.75,
group = "Average Daily San Jose Park Visits",
label = ~paste0(round(mean_visits)," average daily park visits"),
highlightOptions =
highlightOptions(
weight = 2.25,
opacity = 1
)) %>%
addPolygons(
data = visit_pop_combo %>%
st_transform(4326),
fillColor = ~teal_pal(visits_per_pop),
color = "black",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.75,
group = "Average Park Visits per 1000",
label = ~paste0(visits_per_pop," average daily park visits per 1000"),
highlightOptions =
highlightOptions(
weight = 2.25,
opacity = 1
)
) %>%
addLayersControl(
baseGroups = c("Average Daily San Jose Park Visits", "Average Park Visits per 1000"),
options = layersControlOptions(collapsed = FALSE)
)
all_visitors_map
Some interesting results to highlight:
The block group that has both the highest average daily park visits and highest visit/1000 people ratio is shown in the following view. Interestingly, this CBG contains the Santa Clara County jail and police office. Approximately 60% of the “residents” of this block group visit a San Jose park everyday. But who are these visitors? Inmates are not likely to have a cellular device. After a conversation with Stanford’s Reglab, we hypothesize that there are certain devices (tablets or phones) that police and correction officers use during the workday, but leave at the office during the night (which becomes the device’s home).
all_visitors_map %>%
setView(-121.906408, 37.350420, zoom = 15)
An additional block group that stands out is one located near Emma Prusch Farm Park. This CBG contains mainly an on-ramp, a portion of Bayshore Freeway, and a storage center. In the “Average Daily Park Visits” view, this CBG does not stand out. But when you switch to the “Average Park Visits per 1000” view, the CBG shows to have the second highest visits/population ratio in all of San Jose (although the daily park visit average is only 8, 53% of those people visit parks everyday). This may be an instance of a homeless encampment underneath or near the freeway on-ramp, whose residents visit the nearby Emma Prusch Farm Park quite frequently.
homeless <-
all_visitors_map %>%
showGroup("Average Park Visits per 1000") %>%
setView(-121.847157, 37.335857, zoom = 15)
homeless
What communities visit parks more or less frequently than others? We will now directly compare the CBGs with the lowest and highest park visits per 1000 ratios since January 1, 2020.
We considered a low visits per 1000 ratio to be less than 21, and a high ratio to be more than 109 (one SD less/more than the mean, respectively).
lowest_visitors <-
visit_pop_combo %>%
filter(visits_per_pop < mean(visit_pop_combo$visits_per_pop)-sd(visit_pop_combo$visits_per_pop)) %>%
st_as_sf() %>%
st_transform(projection)
purple_pal <- colorNumeric(
palette = colorRamp(c("#b27ef2", "#4900a3"), interpolate="spline"),
domain =
lowest_visitors %>%
pull(visits_per_pop) %>%
unique()
)
highest_visitors <-
visit_pop_combo %>%
filter(visits_per_pop > mean(visit_pop_combo$visits_per_pop)+sd(visit_pop_combo$visits_per_pop)) %>%
st_as_sf() %>%
st_transform(projection)
red_pal <- colorNumeric(
palette = colorRamp(c("#ff8585", "#9c0000"), interpolate="spline"),
domain =
highest_visitors %>%
pull(visits_per_pop) %>%
unique()
)
extremes_visitors_map <-
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data = sj_boundary %>%
st_transform(4326),
fill = F,
color = "gray",
weight = 1,
label = ~NAME
) %>%
addPolygons(
data = lowest_visitors %>%
st_transform(4326),
fillColor = ~purple_pal(visits_per_pop),
color = "black",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.75,
group = "CBGs with Lowest Park Visit Ratio",
label = ~paste0(visits_per_pop," average daily park visits per 1000"),
highlightOptions =
highlightOptions(
weight = 2.25,
opacity = 1
)
) %>%
addPolygons(
data = highest_visitors %>%
st_transform(4326),
fillColor = ~red_pal(visits_per_pop),
color = "black",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.75,
group = "CBGs with Highest Park Visit Ratio",
label = ~paste0(visits_per_pop," average daily park visits per 1000"),
highlightOptions =
highlightOptions(
weight = 2.25,
opacity = 1
)
) %>%
addLayersControl(
overlayGroups = c("CBGs with Lowest Park Visit Ratio", "CBGs with Highest Park Visit Ratio"),
options = layersControlOptions(collapsed = FALSE)
)
extremes_visitors_map
#mapview(lowest_visitors, zcol = "visits_per_pop")
In the geographic sense, there appears to be no specific CBG pattern that would allow for predictive classification of a low or high visit/population ratio.
Our main goal for this section is to find the distance and walking duration of the nearest park from each San Jose census block group. We first start by finding the nearest point on a park boundary to the centroid of each census block group. The following map provides a visual representation of this process. The red shape is the census block group, the green shape is a park, and the purple line represents the shortest distance from the center of the block group to the edge of the park.
sj_bg_coords <-
sj_blockgroup %>%
st_centroid()
temp_nearest <- st_nearest_points(sj_bg_coords$geometry[1],park_geometry$SHAPE[1])
mapview(temp_nearest) + mapview(sj_blockgroup[1,], col.regions = "red") + mapview(park_geometry[1,], col.regions = "green")
This distance is “as the crow flies”, or straight-line distance, which is not an accurate representation of the actual route one would take in a urban environment. Taking into account the complex nature of streets, we use OSRM routing to calculate walking distances and durations between census block groups and the nearest point on a park boundary. The following map illustrates the more refined route that results from OSRM:
#
# nearest_points <- data.frame(origin_census_block_group = NA, geometry = NA, park_name = NA, nearest_point = NA)
#
# for(i in 1:nrow(sj_bg_coords)){
# for(j in 1:nrow(park_geometry)){
# temp_df <- data.frame(
# origin_census_block_group = sj_bg_coords$origin_census_block_group[i],
# geometry = sj_bg_coords$geometry[i],
# park_name = park_geometry$location_name[j]
# )
#
# temp_df <-
# temp_df %>%
# mutate(
# nearest_point = ifelse(length(st_intersection(st_nearest_points(sj_bg_coords$geometry[i],park_geometry$SHAPE[j]),park_geometry$SHAPE[j])) == 0, NA, st_intersection(st_nearest_points(sj_bg_coords$geometry[i],park_geometry$SHAPE[j]),park_geometry$SHAPE[j]))
# )
#
# nearest_points <-
# nearest_points %>%
# rbind(temp_df)
# }
#
# print(i)
#
# }
#
# nearest_points <-
# nearest_points %>%
# na.omit()
#
#
# save(nearest_points, file = "sj_park_bg_nearest_points.Rdata")
load("sj_park_bg_nearest_points.Rdata")
#
#
#
# cbg_names <-
# nearest_points %>%
# dplyr::select(origin_census_block_group)
#
# park_names <-
# nearest_points %>%
# dplyr::select(park_name)
#
# nearest_points_cbg <-
# nearest_points %>%
# st_as_sf() %>%
# st_set_crs(projection) %>%
# st_transform(4326) %>%
# rename(
# block_group_centroid = geometry
# ) %>%
# dplyr::select(origin_census_block_group,block_group_centroid) %>%
# st_coordinates() %>%
# cbind(cbg_names) %>%
# rename(
# "latcbg" = Y,
# "loncbg" = X
# )
#
# nearest_points_park <-
# nearest_points %>%
# st_set_geometry("nearest_point") %>%
# st_as_sf() %>%
# st_set_crs(projection) %>%
# st_transform(4326) %>%
# st_centroid() %>%
# dplyr::select(park_name,nearest_point) %>%
# st_coordinates() %>%
# cbind(park_names) %>%
# rename(
# "latpark" = Y,
# "lonpark" = X
# )
#
#
# nearest_points_all <-
# nearest_points_cbg %>%
# cbind(nearest_points_park) %>%
# na.omit()
#
#
# dist_park_osrm <-
# 1:nrow(nearest_points_all) %>%
# map_dfr(function(i){
# if(i%%50 == 0){print(i)}
# osrmRoute(
# src = nearest_points_all[i, c("origin_census_block_group", "loncbg","latcbg")],
# dst = nearest_points_all[i, c("park_name", "lonpark","latpark")],
# returnclass="sf"
# ) %>%
# as.data.frame()
# }) %>%
# st_as_sf() %>%
# st_set_crs(4326)
#
# save(dist_park_osrm, file = "sj_osrm_park_dist.Rdata")
load("sj_osrm_park_dist.Rdata")
mapview(dist_park_osrm[1,]) + mapview(sj_blockgroup[1,], col.regions = "red") + mapview(park_geometry[1,], col.regions = "green")
The OSRM method assumes a fixed walking speed of 5 km/hr, or about 20 minutes per mile. We used this method to find the nearest park to each census block group. The following map displays all of the census block groups in San Jose, color coded by the distance to the nearest park.
min_dist_park_osrm <-
dist_park_osrm %>%
as.data.frame() %>%
group_by(src) %>%
summarize(
rank = which.min(distance),
distance = distance[rank],
duration = duration[rank],
park_name = dst[rank]) %>%
rename(
origin_census_block_group = src
) %>%
dplyr::select(-rank) %>%
left_join(
sj_blockgroup,
by = "origin_census_block_group"
) %>%
st_as_sf() %>%
mutate(distance = distance / 1.60934)
green_pal <- colorNumeric(
palette = colorRamp(c("#95f599", "#007804"), interpolate="spline"),
domain =
min_dist_park_osrm %>%
pull(distance) %>%
unique()
)
min_dist_map <-
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data = sj_boundary %>%
st_transform(4326),
fill = F,
color = "gray",
weight = 1,
label = ~NAME
) %>%
addPolygons(
data = min_dist_park_osrm %>%
st_transform(4326),
fillColor = ~green_pal(distance),
color = "black",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.75,
#group = "CBGs with Lowest Park Visit Ratio",
label = ~paste0("The closest park is ",park_name,", which is ",round(distance,2), " miles away."),
highlightOptions =
highlightOptions(
weight = 2.25,
opacity = 1
)
) %>%
addLegend(
position = "topright",
pal = green_pal,
values = min_dist_park_osrm$distance,
title = "Distance to Nearest Park (mi)",
opacity = 1
)
min_dist_map
#all_isochrones <- data.frame(geometry=NA,location_name=NA)[numeric(0),]
#test <- park_geometry %>% filter(!location_name %in% all_isochrones$location_name)
#I had to change the osrmIsochrone res to 15 for Lake Cunningham Park and Municipal Rifle Range
# for(i in 1:nrow(test)){ #140:nrow(park_geometry)
# print(i)
#
# boundary_coords <- test[i,] %>% st_coordinates()
#
# isochrone_hold <- data.frame(id=NA,min=NA,max=NA,geometry=NA,center=5)[numeric(0),]
#
# for(j in 1:nrow(boundary_coords)){
# print(nrow(boundary_coords))
# print(j)
# pt <- boundary_coords[j,1:2] %>%
# st_point %>%
# st_sfc() %>%
# st_sf() %>%
# st_set_crs(projection) %>%
# st_transform(4326)
#
# pt_isochrone <-
# pt %>%
# osrmIsochrone(
# returnclass="sf",
# breaks = 10,
# res = 15
# )
#
# isochrone_hold <-
# isochrone_hold %>%
# rbind(pt_isochrone)
#
# }
#
# isochrone_union <-
# isochrone_hold %>%
# st_union() %>%
# st_as_sf() %>%
# rename("geometry" = "x") %>%
# mutate(location_name = test$location_name[i])
#
# all_isochrones <-
# all_isochrones %>%
# rbind(isochrone_union)
#
# }
#save(all_isochrones, file = "sj_10min_park_isochrone.Rdata")
load("sj_10min_park_isochrone.Rdata")
According to Avi Yotam, who is the San Jose Parks Division Acting Deputy Director, a goal in parks and recreation departments across the nation is having a park within a 10-minute walk of all communities. By using OSRM Routing, we created isochrones that represent a 10 minute walk from any point on the boundary of San Jose Parks. In the following map, the “10 Minute Walking Distance from SJ Park Boundaries” option displays San Jose parks along with their 10 minute walking isochrones. The “CBGs within a 10 min. walk of a park” shows as the title describes.
walk_10min <-
dist_park_osrm %>%
as.data.frame() %>%
st_as_sf() %>%
st_drop_geometry() %>%
filter(duration <= 10) %>%
rename(
origin_census_block_group = src,
park_name = dst
) %>%
group_by(origin_census_block_group) %>%
summarise(duration = mean(duration)) %>%
left_join(
sj_blockgroup,
by = "origin_census_block_group"
) %>%
st_as_sf()
green_pal2 <- colorNumeric(
palette = colorRamp(c("#95f599", "#007804"), interpolate="spline"),
domain =
walk_10min %>%
pull(duration) %>%
unique()
)
min_10duration_map <-
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data = sj_boundary %>%
st_transform(4326),
fill = F,
color = "gray",
weight = 1,
label = ~NAME
) %>%
addPolygons(
data = walk_10min %>%
st_transform(4326),
fillColor = ~green_pal2(duration),
color = "black",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.75,
group = "CBGs within a 10 min. walk of a park",
label = ~paste0("Walking time to the nearest park: ",round(duration)," mintues"),
highlightOptions =
highlightOptions(
weight = 2.25,
opacity = 1
)
) %>%
addPolygons(
data = all_isochrones %>%
st_transform(4326),
fillColor = "#93c400",
color = "black",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.75,
group = "10 Minute Walking Distance from SJ Park Boundaries",
label = "10 Minute Walk Isochrone",
highlightOptions =
highlightOptions(
weight = 2.25,
opacity = 1
))%>%
addPolygons(
data = park_geometry %>%
st_transform(4326),
fillColor = "rgb(112,112,112)",
color = "black",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.75,
group = "10 Minute Walking Distance from SJ Park Boundaries",
label = ~paste0(location_name),
highlightOptions =
highlightOptions(
weight = 2.25,
opacity = 1
))%>%
addLayersControl(
baseGroups = c("10 Minute Walking Distance from SJ Park Boundaries", "CBGs within a 10 min. walk of a park"),
options = layersControlOptions(collapsed = FALSE)
)
min_10duration_map
mass_isochrone <-
all_isochrones %>%
st_union() %>%
st_as_sf() %>%
st_transform(4326)
original_area <- st_area(sj_boundary %>% st_transform(4326))
difference <- st_difference(sj_boundary %>% st_transform(4326), mass_isochrone)
new_area <- st_area(difference)
park_range_coverage <- 1-(as.numeric(new_area) / as.numeric(original_area))
On the city level, 38% of the land is within a 10 minute walking distance of a park, and 62% of the land is outside of this range. The next map illustrates which areas of San Jose fall inside/outside of the 10 minute walk zone.
inside_outside <-
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data = sj_boundary %>%
st_transform(4326),
fill = F,
color = "gray",
weight = 1,
label = ~NAME
) %>%
addPolygons(
data = mass_isochrone %>%
st_transform(4326),
fillColor = "green",
color = "black",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.75,
group = "Inside the 10 Minute Walking Zone",
label = ~paste0("Within a 10 minute walk to a park."),
highlightOptions =
highlightOptions(
weight = 2.25,
opacity = 1
)
) %>%
addPolygons(
data = difference %>%
st_transform(4326),
fillColor = "blue",
color = "black",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.75,
group = "Outside the 10 Minute Walking Zone",
label = ~paste0("Not Within a 10 minute walk to a park."),
highlightOptions =
highlightOptions(
weight = 2.25,
opacity = 1
)
) %>%
addLayersControl(
baseGroups = c("Inside the 10 Minute Walking Zone", "Outside the 10 Minute Walking Zone"),
options = layersControlOptions(collapsed = FALSE)
)
inside_outside
How does the OSRM Routing distance calculation method compare to the “as the crow flies” straight-line method? To illustrate the difference, we will use the distances between San Jose CBGs and their nearest park (displayed in the maps in the previous section). In almost every case, the OSRM distance is larger than the straight line distance, so the the numbers in the map legend result from: OSRM - as the crow flies. Note that the delta is greater in the CBGs on the periphery of San Jose.
#Comparison: As the crow flies and OSRM
# matched_points <- data.frame(origin_census_block_group = NA, geometry = NA, park_name = NA, nearest_point = NA)
#
# nearest_points <-
# nearest_points %>%
# as.data.frame()
#
# for(i in 1:nrow(nearest_points)){
# if(i%%500 == 0){print(i)}
# for(j in 1:nrow(min_dist_park_osrm)){
# if(nearest_points$origin_census_block_group[i] == min_dist_park_osrm$origin_census_block_group[j] & nearest_points$park_name[i] == min_dist_park_osrm$park_name[j]){
# matched_points <-
# matched_points %>%
# rbind(nearest_points[i,])
# }
# }
#
# }
#
#
# nearest_points_dist <-
# matched_points %>%
# na.omit() %>%
# st_set_geometry("geometry") %>%
# st_as_sf() %>%
# st_set_crs(projection) %>%
# st_transform(4326) %>%
# st_set_geometry("nearest_point") %>%
# st_as_sf() %>%
# st_set_crs(projection) %>%
# st_transform(4326) %>%
# mutate(
# crow_dist = 0
# )
#
# for(i in 1:nrow(nearest_points_dist)){
# nearest_points_dist$crow_dist[i] = as.numeric(st_length(st_nearest_points(nearest_points_dist$geometry[i],nearest_points_dist$nearest_point[i]))) / 1609.34
# }
#
# nearest_points_dist_mi <-
# nearest_points_dist %>%
# mutate(
# crow_dist = as.numeric(crow_dist / 1609.34) #convert to miles
# )
#
# save(nearest_points_dist, file = "sj_parks_crow_dist.Rdata")
load("sj_parks_crow_dist.Rdata")
osrm_crow <-
nearest_points_dist %>%
left_join(
min_dist_park_osrm %>%
st_set_geometry(NULL) %>%
dplyr::select(origin_census_block_group,distance),
by = "origin_census_block_group"
) %>%
mutate(dist_diff = distance - crow_dist) %>%
st_set_geometry(NULL) %>%
st_set_geometry("geometry") %>%
st_set_geometry(NULL) %>%
left_join(
sj_blockgroup,
by = "origin_census_block_group"
)
teal_pal2 <- colorNumeric(
palette = colorRamp(c("#d6fffe", "#015a68"), interpolate="spline"),
domain =
osrm_crow %>%
pull(dist_diff) %>%
unique()
)
osrm_crow_map <-
leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(
data = sj_boundary %>%
st_transform(4326),
fill = F,
color = "gray",
weight = 1,
label = ~NAME
) %>%
addPolygons(
data = osrm_crow %>%
st_as_sf() %>%
st_transform(4326),
fillColor = ~teal_pal2(dist_diff),
color = "black",
weight = 0.5,
opacity = 0.5,
fillOpacity = 0.75,
label = ~paste0("Delta between OSRM and 'as the crow flies': ",round(dist_diff,2)," miles"),
highlightOptions =
highlightOptions(
weight = 2.25,
opacity = 1
)
) %>%
addLegend(
position = "topright",
pal = teal_pal2,
values = osrm_crow$dist_diff,
title = "Delta between OSRM and 'as the crow flies' distances",
opacity = 1
)
osrm_crow_map
#
#
# park_10min_pop <-
# all_isochrones %>%
# mutate(
# pop_10min_park = NA
# )
#
# for(park in 7:nrow(all_isochrones)){
# print(park)
#
# sj_bg_iso <-
# sj_blockgroup %>%
# dplyr::select(-DISTRICTS) %>%
# st_transform(4326) %>%
# mutate(
# original_area = as.numeric(st_area(geometry)),
# new_geom = NA,
# new_geom_area = NA
# )
# for(i in 1:nrow(sj_bg_iso)){
# #if(i%%10 == 0){print(i)}
# sj_bg_iso$new_geom[i] <- st_intersection(sj_bg_iso$geometry[i],all_isochrones$geometry[park]) %>% st_as_sf()
#
# if(length(st_intersection(sj_bg_iso$geometry[i],all_isochrones$geometry[park])) != 0){
# sj_bg_iso$new_geom_area[i] <- as.numeric(st_area(st_intersection(sj_bg_iso$geometry[i],all_isochrones$geometry[park])))
# }else{
# sj_bg_iso$new_geom_area[i] <- 0
# }
# }
#
# sj_bg_iso_fraction <-
# sj_bg_iso %>%
# st_set_geometry(NULL) %>%
# mutate(
# iso_coverage_fraction = round(new_geom_area / original_area,2)
# ) %>%
# dplyr::select(origin_census_block_group,iso_coverage_fraction)
#
#
# sj_iso_pop <-
# sj_bg_iso_fraction %>%
# left_join(
# sj_population,
# by = "origin_census_block_group"
# ) %>%
# mutate(
# pop_10min_park = total_pop * iso_coverage_fraction
# )
#
# park_10min_pop$pop_10min_park[park] <- sum(sj_iso_pop$pop_10min_park)
# }
#
#
# save(park_10min_pop,file="sj_pop_10min_park.Rdata")
load("sj_pop_10min_park.Rdata")
To illustrate the difference further, the following scatter plot directly compares the distance values for each block group. The solid black line represents a 1:1 relationship, which the data would follow if the OSRM/as the crow flies values were identical. The points deviate away from the 1:1 line as the distances become greater, indicating that the OSRM distances are a more accurate representation of actual travel for parks that are further away from a CBG.
dist_scatter <-
osrm_crow %>%
ggplot(
aes(
x = crow_dist,
y = distance
)
) +
geom_abline(
mapping = NULL,
data = NULL,
1,
0,
na.rm = FALSE,
show.legend = TRUE
) +
geom_point() +
labs(
x = "As the Crow Flies Distance (mi.)",
y = "OSRM Routing Distance (mi.)",
title = "OSRM Routing and `As the Crow Flies` Distance Comparisons (CBGs to Nearest SJ Park)"
)
dist_scatter
# sj_income <-
# getCensus(
# name = "acs/acs5",
# vintage = 2018,
# region = "block group:*",
# regionin = "state:06+county:085",
# vars = c("group(B19001)")
# ) %>%
# mutate(
# origin_census_block_group =
# paste0(state,county,tract,block_group)
# ) %>%
# dplyr::select(-c(GEO_ID,state,NAME,contains("EA"),contains("MA"),contains("M"),county,tract,block_group)) %>%
# rename(
# "Total" = "B19001_001E"
# ) %>%
# mutate(
# percent_over_125k = round((B19001_015E+B19001_016E+B19001_017E) / Total * 100,2)
# ) %>%
# dplyr::select(origin_census_block_group,percent_over_125k) %>%
# filter(origin_census_block_group %in% sj_blockgroup$origin_census_block_group)
#
# sj_race <-
# getCensus(
# name = "acs/acs5",
# vintage = 2018,
# region = "block group:*",
# regionin = "state:06+county:085",
# vars = c("group(B02001)")
# ) %>%
# mutate(
# origin_census_block_group =
# paste0(state,county,tract,block_group)
# ) %>%
# dplyr::select(-c(GEO_ID,state,NAME,contains("EA"),contains("MA"),contains("M"),county,tract,block_group)) %>%
# mutate(
# percent_white = round((B02001_002E / B02001_001E) *100,2)
# ) %>%
# dplyr::select(origin_census_block_group,percent_white) %>%
# filter(origin_census_block_group %in% sj_blockgroup$origin_census_block_group)
sj_dem_data <- readRDS("Simone_Speizer/sj_socialdistancing_demdata_prepostdifs_manyvars.rds")
In our analysis, we quantify the correlations between minimum distance to a park, yard area per capita, park visits per 1000 people, and census block group averages of various demographic variables (including income, age, language ability, race, ethnicity, education level, vehicle ownership, and sex of workers). We choose to analyze these demographic variables based on our literature review of existing research on correlations with social distancing and our hypotheses of potentially relevant factors in San Jose and the Bay Area. Information on these demographic variables at the census block group level is from the American Community Survey 2014-2018 5 year summary data.
Hypothesis: People who have larger yards visit parks less.
yard_visits_tied1 <-
visit_pop_combo %>%
left_join(
sj_yard_pop_weight %>%
st_set_geometry(NULL) %>%
dplyr::select(yard_per_capita, origin_census_block_group),
by = "origin_census_block_group"
)%>%
left_join(
min_dist_park_osrm %>%
st_set_geometry(NULL) %>%
dplyr::select(origin_census_block_group,distance),
by = "origin_census_block_group"
) %>%
filter(yard_per_capita != 0 & yard_per_capita < 2600) %>%
left_join(
sj_dem_data,
by = c("origin_census_block_group" = "blockgroup")
) %>%
st_set_geometry(NULL)
yard_visits_tied <-
yard_visits_tied1
#here I normalize all of the values to make the coefficients more readable: (x - mean(x)) then (x / sd(x))
for(j in 4:30){
mean_hold <- mean(yard_visits_tied[,j])
for(i in 1:nrow(yard_visits_tied)){
yard_visits_tied[i,j] = yard_visits_tied[i,j] - mean_hold
}
}
for(j in 4:30){
sd_hold <- sd(yard_visits_tied[,j])
for(i in 1:nrow(yard_visits_tied)){
yard_visits_tied[i,j] = yard_visits_tied[i,j] / sd_hold
}
}
Yard Area per Capita as a Predictor for Park Visits per 1000 people.
Coefficient:
lm_yard_visits <- lm(yard_per_capita ~ visits_per_pop, yard_visits_tied)
print(summary.lm(lm_yard_visits)$coefficients, digits = 4, signif.stars=TRUE)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.502e-16 0.05295 -2.837e-15 1.000000
## visits_per_pop 1.464e-01 0.05303 2.760e+00 0.006082
R-Squared:
print(summary.lm(lm_yard_visits)$r.squared, digits = 4)
## [1] 0.02142
The following scatter plot represents a linear regression between the average residential yard size and average park visits per 1000 people for each block group in San Jose. There is a significant positive correlation between these two factors (p-value < 0.05), representing that in general, people who live in communities that have a larger yard area per capita tend to visit parks more than communities with smaller yard areas per capita. This correlation seems somewhat counterintuitive; we hypothesized that if one is lacking yard space, then that person would be more likely to seek open space in parks. In our next iteration of this report, we plan to account for confounding variables, such as median household income, race/ethnicity, and family make-up of San Jose CBGs. See the linear model summary after the plot for the statistical results.
lim_scatter <-
yard_visits_tied1 %>%
ggplot(
aes(
x = yard_per_capita,
y = visits_per_pop
)
) +
geom_point() +
geom_smooth(method=lm) +
labs(
x = "CBG Yard Area per Capita (sqft.)",
y = "CBG Average Daily Park Visits per 1000",
title = "SJ Park Accessibility: Parks Visits per 1000 vs. Residential Yard Area per Population"
)
lim_scatter
Note: In the above scatter plot, a 2,600 sqft yard area per capita threshold is implemented in order to exclude outliers. The following histogram depicts all yard area per capita values. The main curve ends at 2,300 sqft, and the outlier clusters begin around 2,900 sqft. As mentioned earlier, these large values may be attributed to zoning misclassifications, so we do not include them in the linear model.
yard_hist <- plot_ly(
x = sj_yard_pop_weight$yard_per_capita,
type = "histogram") %>%
layout(
title = "San Jose CBG Yard Area per Capita Distribution",
shapes=list(type='line', x0= 2600, x1= 2600, y0=0, y1=316, line=list(dash='dot', width=1)),
xaxis = list(
title = "Yard Area per Capita (sqft.)"
),
yaxis = list(
title = "Number of CBG Occurances"
),
annotations = list(
list(
xref = "x",
yref = "y",
x = 2600,
y = 316,
text = "Threshold",
xanchor = "center",
yanchor = "bottom",
showarrow = F
)
)
)
yard_hist
Minimum Distance to a Park as a Predictor for Park Visits per 1000 people.
Coefficient:
lm_visits_distance <- lm(visits_per_pop ~ distance, yard_visits_tied)
print(summary.lm(lm_visits_distance)$coefficients, digits = 4, signif.stars=TRUE)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.978e-16 0.05323 -3.715e-15 1.00000
## distance -1.059e-01 0.05330 -1.986e+00 0.04777
R-Squared:
print(summary.lm(lm_visits_distance)$r.squared, digits = 4)
## [1] 0.01121
P-value is less than 0.05, but the R-squared values is still quite low.
Visits vs. Distance Scatter
lim_scatter2 <-
yard_visits_tied1 %>%
ggplot(
aes(
x = distance,
y = visits_per_pop
)
) +
geom_point() +
geom_smooth(method=lm) +
labs(
x = "Distance to Nearest Park (mi.)",
y = "CBG Average Daily Park Visits per 1000",
title = "SJ Park Accessibility: Parks Visits per 1000 vs. Distance to the Nearest Park"
)
lim_scatter2
Minimum Distance to a Park and Yard Area per Capita as a predictor for Park Visits per 1000 People
Coefficient:
lm_visits_dist2 <- lm(visits_per_pop ~ distance + yard_per_capita, yard_visits_tied)
print(summary.lm(lm_visits_dist2)$coefficients, digits = 4, signif.stars=TRUE)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.673e-16 0.05236 -3.195e-15 1.0000000
## distance -1.644e-01 0.05495 -2.992e+00 0.0029714
## yard_per_capita 1.956e-01 0.05495 3.559e+00 0.0004242
R-Squared
print(summary.lm(lm_visits_dist2)$r.squared, digits = 4)
## [1] 0.04603
When combined, distance and yard area are better predictors for park visits than they are individually.
Demographic Factors (combined with distance and yard area) as predictors for visits per population
Coefficient:
lm_visits_dist3 <- lm(visits_per_pop ~ distance + yard_per_capita + `% hispanic/latino` + `% Asian` + `percent less than 1` + `percent elderly` ,yard_visits_tied)
print(summary.lm(lm_visits_dist3)$coefficients, digits = 4, signif.stars=TRUE)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.897e-18 0.04961 1.793e-16 1.000e+00
## distance -1.657e-01 0.05220 -3.175e+00 1.632e-03
## yard_per_capita 2.222e-01 0.05603 3.966e+00 8.915e-05
## `% hispanic/latino` -2.662e-01 0.08260 -3.222e+00 1.394e-03
## `% Asian` -2.763e-01 0.06213 -4.447e+00 1.180e-05
## `percent less than 1` -1.854e-01 0.07320 -2.532e+00 1.177e-02
## `percent elderly` 2.175e-01 0.05434 4.002e+00 7.703e-05
R-Squared:
print(summary.lm(lm_visits_dist3)$r.squared, digits = 4)
## [1] 0.1535
When factoring in % Hispanic/Latino, % Asian, % Less than One Year Old, and % Elderly in addition to distance to a park and yard area, we get the strongest predictor for park visits per population.
lm_yard_distance_pop <- lm(yard_per_capita ~ distance + visits_per_pop, yard_visits_tied)
print(summary.lm(lm_yard_distance_pop)$coefficients, digits = 4, signif.stars=TRUE)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.202e-16 0.05024 -2.393e-15 1.000e+00
## distance 3.183e-01 0.05060 6.290e+00 9.531e-10
## visits_per_pop 1.801e-01 0.05060 3.559e+00 4.242e-04
print(summary.lm(lm_yard_distance_pop)$r.squared, digits = 4)
## [1] 0.1216
lm_yard_vars <- lm(yard_per_capita ~ visits_per_pop + distance + `% white` + `% over 125,000` + `percent less than 1` + `percent associates or higher`, yard_visits_tied)
print(summary.lm(lm_yard_vars)$coefficients, digits = 4, signif.stars=TRUE)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.725e-16 0.04538 -6.005e-15 1.000e+00
## visits_per_pop 1.799e-01 0.04699 3.827e+00 1.539e-04
## distance 2.605e-01 0.04646 5.606e+00 4.260e-08
## `% white` -1.630e-01 0.04890 -3.334e+00 9.496e-04
## `% over 125,000` 3.500e-01 0.06938 5.045e+00 7.376e-07
## `percent less than 1` 2.905e-01 0.06118 4.748e+00 3.021e-06
## `percent associates or higher` -2.080e-01 0.07289 -2.854e+00 4.581e-03
print(summary.lm(lm_yard_vars)$r.squared, digits = 4)
## [1] 0.2915
lm_dist <- lm(distance ~ visits_per_pop + yard_per_capita, yard_visits_tied)
summary(lm_dist)
##
## Call:
## lm(formula = distance ~ visits_per_pop + yard_per_capita, data = yard_visits_tied)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6439 -0.5150 -0.0922 0.3140 7.8533
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.597e-17 5.050e-02 0.000 1.00000
## visits_per_pop -1.530e-01 5.113e-02 -2.992 0.00297 **
## yard_per_capita 3.216e-01 5.113e-02 6.290 9.53e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9448 on 347 degrees of freedom
## Multiple R-squared: 0.1124, Adjusted R-squared: 0.1073
## F-statistic: 21.98 on 2 and 347 DF, p-value: 1.031e-09
lm_dist2 <- lm(distance ~ visits_per_pop + yard_per_capita + `% white` + `% hispanic/latino` + `% Asian` + `% speaking english > well` + `% over 100,000` + `% over 125,000` + `percent elderly`, yard_visits_tied)
summary(lm_dist2)
##
## Call:
## lm(formula = distance ~ visits_per_pop + yard_per_capita + `% white` +
## `% hispanic/latino` + `% Asian` + `% speaking english > well` +
## `% over 100,000` + `% over 125,000` + `percent elderly`,
## data = yard_visits_tied)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6120 -0.4911 -0.1240 0.3717 7.4949
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.555e-16 4.882e-02 0.000 1.000000
## visits_per_pop -1.734e-01 5.197e-02 -3.337 0.000941 ***
## yard_per_capita 2.860e-01 5.479e-02 5.219 3.13e-07 ***
## `% white` 3.679e-01 1.141e-01 3.224 0.001385 **
## `% hispanic/latino` 3.460e-01 1.053e-01 3.285 0.001125 **
## `% Asian` 5.381e-01 1.364e-01 3.945 9.68e-05 ***
## `% speaking english > well` 2.161e-01 7.848e-02 2.754 0.006212 **
## `% over 100,000` -5.183e-01 1.673e-01 -3.098 0.002110 **
## `% over 125,000` 4.119e-01 1.670e-01 2.467 0.014123 *
## `percent elderly` 9.115e-02 5.785e-02 1.576 0.116046
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9133 on 340 degrees of freedom
## Multiple R-squared: 0.1873, Adjusted R-squared: 0.1658
## F-statistic: 8.709 on 9 and 340 DF, p-value: 8.676e-12
park_visits <- readRDS("park_output.rds") %>%
filter(date > "2019-12-31" %>% as.Date()) %>%
mutate(
visit_avg = (visit_counts_high + visit_counts_low) / 2
) %>%
group_by(Name) %>%
summarise(visit_avg = round(mean(visit_avg)))
park_visits <- park_visits[-8,]
park_visit_pop_tied <-
park_visits %>%
left_join(
park_10min_pop %>%
st_set_geometry(NULL) %>%
rename("Name" = "location_name") %>%
mutate(
pop_10min_park = round(pop_10min_park)
),
by = "Name"
)
lim_mod4 <- lm(park_visit_pop_tied$visit_avg ~ park_visit_pop_tied$pop_10min_park)
lim_scatter4 <-
park_visit_pop_tied %>%
ggplot(
aes(
x = pop_10min_park,
y = visit_avg
)
) +
geom_point() +
geom_smooth(method=lm) +
labs(
x = "Population Within 10 Minutes of a Park",
y = "Average Daily Park Visits Since Jan. 1, 2020",
title = "SJ Park Accessibility: Population within 10 Minutes of a Park vs. Average Daily Park Visits"
)
lim_scatter4
summary(lim_mod4)
##
## Call:
## lm(formula = park_visit_pop_tied$visit_avg ~ park_visit_pop_tied$pop_10min_park)
##
## Residuals:
## Min 1Q Median 3Q Max
## -293.62 -106.12 -43.83 35.53 1217.49
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.199e+02 3.034e+01 3.953 0.00011 ***
## park_visit_pop_tied$pop_10min_park 2.575e-02 5.987e-03 4.301 2.77e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 187.2 on 183 degrees of freedom
## Multiple R-squared: 0.09179, Adjusted R-squared: 0.08683
## F-statistic: 18.5 on 1 and 183 DF, p-value: 2.767e-05
#Comparison: Developed Park Area vs. Average Daily Park Visits
dev_park_area <-
park_geometry %>%
st_set_geometry(NULL) %>%
dplyr::select(location_name,DEVACRES) %>%
rename(
"Name" = "location_name"
)
park_visit_dev_tied <-
park_visits %>%
left_join(
dev_park_area,
by = "Name"
)
lim_mod5 <- lm(park_visit_dev_tied$visit_avg ~ park_visit_dev_tied$DEVACRES)
lim_scatter5 <-
park_visit_dev_tied %>%
ggplot(
aes(
x = visit_avg,
y = DEVACRES
)
) +
geom_point() +
geom_smooth(method=lm) +
labs(
x = "Population Within 10 Minutes of a Park",
y = "Developed Acres within Park",
title = "SJ Park Accessibility: Park Developed Acres vs. Average Daily Park Visits"
)
lim_scatter5
summary(lim_mod5)
##
## Call:
## lm(formula = park_visit_dev_tied$visit_avg ~ park_visit_dev_tied$DEVACRES)
##
## Residuals:
## Min 1Q Median 3Q Max
## -508.92 -97.69 -41.90 41.83 1184.50
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 202.7222 14.4799 14.000 < 2e-16 ***
## park_visit_dev_tied$DEVACRES 4.5858 0.7916 5.793 2.96e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 180.6 on 183 degrees of freedom
## Multiple R-squared: 0.155, Adjusted R-squared: 0.1504
## F-statistic: 33.56 on 1 and 183 DF, p-value: 2.962e-08
#Comparison: Developed Park Area vs. Population Living Within 10 Minutes of a Park
park_pop_dev_tied <-
dev_park_area %>%
left_join(
park_10min_pop %>%
st_set_geometry(NULL) %>%
rename("Name" = "location_name") %>%
mutate(
pop_10min_park = round(pop_10min_park)
)
)
lim_mod6 <- lm(park_pop_dev_tied$DEVACRES ~ park_pop_dev_tied$pop_10min_park)
lim_scatter6 <-
park_pop_dev_tied %>%
ggplot(
aes(
x = pop_10min_park,
y = DEVACRES
)
) +
geom_point() +
geom_smooth(method=lm) +
labs(
x = "Average Daily Park Visits Since Jan. 1, 2020",
y = "Developed Acres within Park",
title = "SJ Park Accessibility: Population within 10 Minutes of a Park vs. Park Developed Acres"
)
lim_scatter6
summary(lim_mod6)
##
## Call:
## lm(formula = park_pop_dev_tied$DEVACRES ~ park_pop_dev_tied$pop_10min_park)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.108 -5.544 -2.586 0.617 171.000
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.0203626 2.6975261 0.749 0.4548
## park_pop_dev_tied$pop_10min_park 0.0011693 0.0005324 2.196 0.0293 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.65 on 183 degrees of freedom
## Multiple R-squared: 0.02568, Adjusted R-squared: 0.02036
## F-statistic: 4.824 on 1 and 183 DF, p-value: 0.02932
# park_all_combo_dem <-
# park_visits %>%
# left_join(
# dev_park_area,
# by = "Name"
# ) %>%
# left_join(
# park_10min_pop %>%
# st_set_geometry(NULL) %>%
# rename("Name" = "location_name") %>%
# mutate(
# pop_10min_park = round(pop_10min_park)
# ),
# by = "Name"
# )